from constants import *
from utils import evaluate_model_policy, plot_study, plot_fig
from trainer import get_trained_model
import optuna
from environment import StreetFighterEnv
from stable_baselines3 import PPO, A2C
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.vec_env import DummyVecEnv, VecFrameStack
from actor_critic import A2CCNNPolicy
from feature_extractors import CNNExtractorWithAttention, CNNExtractor
from tuner import Tuner
import os
TIMESTEPS = 1000000
N_TRIALS = 20
PLOTLY_CONFIG = {"staticPlot": True}
model = A2C
model_dir = 'models/without_bias'
env = StreetFighterEnv(capture_movement=False)
policy_network = A2CCNNPolicy
policy_kwargs = dict(
features_extractor_class=CNNExtractor,
features_extractor_kwargs=dict(features_dim=512,),
)
tuner = Tuner(model=model, env=env, policy_network=policy_network, policy_args=policy_kwargs,
timesteps=TIMESTEPS, save_dir=model_dir)
study = tuner.tune_study(n_trials=N_TRIALS, )
study.best_trial.number, study.best_params
[I 2022-04-17 18:13:29,558] A new study created in memory with name: no-name-58d70f2e-fd2e-41f5-aae1-f1f0bc027e2b
[I 2022-04-17 18:14:03,811] Trial 0 finished with value: 0.0 and parameters: {'gamma': 0.9407307776752445, 'learning_rate': 1.7040174763637843e-05, 'gae_lambda': 0.9288876486994231}. Best is trial 0 with value: 0.0. [I 2022-04-17 18:14:43,152] Trial 1 finished with value: 0.0 and parameters: {'gamma': 0.8182398997530516, 'learning_rate': 2.672128578161328e-05, 'gae_lambda': 0.8307251461086189}. Best is trial 0 with value: 0.0.
(0,
{'gamma': 0.9407307776752445,
'learning_rate': 1.7040174763637843e-05,
'gae_lambda': 0.9288876486994231})
plots = plot_study(study)
for plot in plots:
plot.show("notebook", config=PLOTLY_CONFIG)
model = A2C
model_dir = 'models/without_bias_with_movement'
env = StreetFighterEnv(capture_movement=True)
policy_network = A2CCNNPolicy
policy_kwargs = dict(
features_extractor_class=CNNExtractor,
features_extractor_kwargs=dict(features_dim=512,),
)
tuner = Tuner(model=model, env=env, policy_network=policy_network, policy_args=policy_kwargs,
timesteps=TIMESTEPS, save_dir=model_dir)
study = tuner.tune_study(n_trials=N_TRIALS, )
study.best_trial.number, study.best_params
[I 2022-04-17 18:14:43,485] A new study created in memory with name: no-name-76061847-ccf4-4532-94e7-046af9b8c4ec
[I 2022-04-17 18:15:27,875] Trial 0 finished with value: 2500.0 and parameters: {'gamma': 0.8637347690278583, 'learning_rate': 1.76145797657027e-05, 'gae_lambda': 0.8570043829822641}. Best is trial 0 with value: 2500.0. [I 2022-04-17 18:16:06,860] Trial 1 finished with value: 3500.0 and parameters: {'gamma': 0.8420325068057789, 'learning_rate': 1.0736082514244527e-05, 'gae_lambda': 0.8663228666125405}. Best is trial 1 with value: 3500.0.
(1,
{'gamma': 0.8420325068057789,
'learning_rate': 1.0736082514244527e-05,
'gae_lambda': 0.8663228666125405})
plots = plot_study(study)
for plot in plots:
plot.show("notebook", config=PLOTLY_CONFIG)